

## install and load packages
remotes::install_github("xmarquez/democracyData")
library(democracyData)

pacman::p_load( tidyverse, fastDummies, countrycode)
df <-  readRDS("Data/Regimedata/Measures_merged.rds")


##set unique id fun
unique_id <- function(x, ...) {
  id_set <- x %>% select(...)
  id_set_dist <- id_set %>% distinct
  if (nrow(id_set) == nrow(id_set_dist)) {
    TRUE
  } else {
    non_unique_ids <- id_set %>% 
      filter(id_set %>% duplicated()) %>% 
      distinct()
    suppressMessages(
      inner_join(non_unique_ids, x) %>% arrange(...)
    )
  }
}
# load onset data
onsets <- read_csv("Data/Covariates/ucdp-intrastate-count-onset.csv")



onsets <- onsets %>%
  country_year_coder(name,
                     year,
                     match_type = "country",
                     verbose = FALSE,
                     include_in_output = c("extended_country_name", 
                                           "GWn", "cown", 
                                           "polity_ccode", 
                                           "in_GW_system", 
                                           "in_cow_system"))




names(onsets)
sapply(onsets %>%  select (cown, GWn, polity_ccode, name), function(x) sum(is.na(x)))




onsets <- onsets %>% mutate(Final_Code = cown)
onsets <- onsets %>% mutate(Final_Code = coalesce(Final_Code, GWn))

onsets  %>%  unique_id(Final_Code, year)

onsets <- onsets %>% distinct(Final_Code, year, .keep_all = TRUE)
onsets <- onsets %>% select(year, Final_Code, onset1)




mergecols <-  c("year", "Final_Code")



df <- left_join(df,onsets, by = mergecols)



###
vdem <-  readRDS("Data/Regimedata/vdem13.rds")
 
vdem <- vdem %>% filter(year >= 1899)

summary(vdem$v2cagenmob)
summary(vdem$e_pt_coup_attempts)
summary(vdem$v2x_clphy)
summary(vdem$v2reginfo)



vdem <- vdem %>%
  country_year_coder(country_name,
                     year,
                     match_type = "country",
                     verbose = FALSE,
                     include_in_output = c("extended_country_name", 
                                           "GWn", "cown", 
                                           "polity_ccode", 
                                           "in_GW_system", 
                                           "in_cow_system", 
                                           "in_polity_system",
                                           "polity_startdate",
                                           "polity_enddate"))





sapply(vdem %>% select (COWcode, cown, GWn, polity_ccode, country_id, country_name), function(x) sum(is.na(x)))




vdem <- vdem %>% mutate(Final_Code = cown)
vdem <- vdem %>% mutate(Final_Code = coalesce(Final_Code, GWn))
vdem <- vdem %>% mutate(Final_Code = coalesce(Final_Code, polity_ccode))


vdem <- vdem %>% filter(!is.na(Final_Code))%>%
  group_by(Final_Code) %>% arrange(year) %>% mutate(F_reginfo = dplyr::lead(v2reginfo, 1),
                                                    L_reginfo = dplyr::lag(v2reginfo, 1), 
                                                    reg_breakdown = case_when( v2reginfo == L_reginfo ~ 0,
                                                                               v2reginfo != L_reginfo ~ 1))


vdem <- vdem %>% mutate(coup_attempt = if_else(e_pt_coup_attempts >= 1,1,0))


vdem %>% ungroup %>% unique_id(year, Final_Code)
vdem <- vdem %>% distinct(Final_Code, year, .keep_all = TRUE)


vdem <- vdem %>% select(year, Final_Code, v2reginfo, reg_breakdown, v2x_clphy, coup_attempt, v2cagenmob)



df <- left_join(df,vdem %>% filter(year >= 1900), by = mergecols)

saveRDS(df, file = "Data/Regimedata/Measures_merged_covariates.rds")
df <- readRDS("Data/Regimedata/Measures_merged_covariates.rds")

names(df)
sum(is.na(df$extended_country_name))


farris <- readRDS("Data/Covariates/estimates_gdppc_model_combined_normal_noslope_gamma_lambda_additive_test_20220215.rds")

farris <- farris %>% filter(year >= 1900 & indicator == "latent_gdppc")

farris$country <- countrycode(farris$gwno, origin = 'gwn', destination = 'country.name')
sum(is.na(farris$country)) / nrow(farris)

farris <- farris%>%
  country_year_coder(country,
                     year,
                     match_type = "country",
                     verbose = FALSE,
                     include_in_output = c("extended_country_name", 
                                           "GWn", "cown", 
                                           "polity_ccode", 
                                           "in_GW_system", 
                                           "in_cow_system", 
                                           "in_polity_system",
                                           "polity_startdate",
                                           "polity_enddate"))

farris <- farris %>% mutate(Final_Code = cown)


farris <- farris %>% mutate(Final_Code = coalesce(Final_Code, GWn))
sum(is.na(farris$Final_Code)) / nrow(farris)

farris <- farris %>% select(Final_Code,year,  GDP_PC_Farris = mean)

#take care of multiple units for same state due to dissolution etc.
farris <- farris %>% filter(!is.na(Final_Code)) %>%
  group_by(Final_Code, year) %>%
  summarize(GDP_PC_Farris = mean(GDP_PC_Farris, na.rm = T))



farris <- farris %>% distinct(Final_Code, year, .keep_all = TRUE)
farris %>% ungroup %>% unique_id(year, Final_Code)


df <- left_join(df,farris, by = mergecols)

names(df)



df <- df %>% 
  group_by(Final_Code) %>% 
  arrange(year) %>% 
  mutate(gdppc_growth = (GDP_PC_Farris - (dplyr::lag(GDP_PC_Farris, 1))) /  (dplyr::lag(GDP_PC_Farris, 1)),
                                                               negative_growth = if_else(gdppc_growth < 0, 1,0))










df <- df %>%
  group_by(Final_Code) %>% arrange(year) %>% mutate(F_RegType_lied_DEM = dplyr::lead(RegType_lied_DEM, 1),
                                                    F_RegType_RoW_DEM = dplyr::lead(RegType_RoW_DEM, 1),
                                                    F_Politytype_Democracy = dplyr::lead(Politytype_Democracy, 1),
                                                    F_status_fh_F = dplyr::lead(status_fh_F, 1),
                                                    F_AnckarRegtype_Democracy = dplyr::lead(AnckarRegtype_Democracy, 1),
                                                    F_HTW_RegType_Democracy = dplyr::lead(HTW_RegType_Democracy, 1,),
                                                    F_RegType_magaloni_DEM = dplyr::lead(RegType_magaloni_DEM, 1))





saveRDS(df, file = "Data/Regimedata/Measures_merged_covariates.rds")
